\begin{figure}[h]
	\begin{subfigure}{0.49\linewidth}
		\centering
		\includegraphics[width=\linewidth]{benchmark_16-class-accuracy.pdf}
		%\vspace{\captionspaceBenchmark}
		\caption{OOD accuracy (higher = better).}
		\label{subfig:benchmark_a}
		\vspace{\captionspaceII}
	\end{subfigure}\hfill
	\begin{subfigure}{0.49\linewidth}
		\centering
		\includegraphics[width=\linewidth]{benchmark_16-class-accuracy-difference.pdf}
		%\vspace{\captionspaceBenchmark}
		\caption{Accuracy difference (lower = better).}
		\label{subfig:benchmark_b}
		\vspace{\captionspaceII}
	\end{subfigure}\hfill
	\begin{subfigure}{0.49\linewidth}
		\centering
		\includegraphics[width=\linewidth]{benchmark_observed-consistency.pdf}
		%\vspace{\captionspaceBenchmark}
		\caption{Observed consistency (higher = better).}			\label{subfig:benchmark_c}
		\vspace{\captionspaceII}
	\end{subfigure}\hfill
	\begin{subfigure}{0.49\linewidth}
		\centering
		\includegraphics[width=\linewidth]{benchmark_error-consistency.pdf}
		%\vspace{\captionspaceBenchmark}
		\caption{Error consistency (higher = better).}			\label{subfig:benchmark_d}
		\vspace{\captionspaceII}
	\end{subfigure}\hfill
	\caption{Benchmark results for different models, aggregated over datasets.}
	\label{fig:benchmark_barplots}
\end{figure}
